From 67bcd63bda9d7b095489a09b9880aa730ddb5488 Mon Sep 17 00:00:00 2001
From: David Robillard <d@drobilla.net>
Date: Fri, 7 Oct 2022 12:07:51 -0400
Subject: [PATCH] Port sord_validate to pcre2

---
 NEWS                |  3 ++-
 meson.build         |  6 ++---
 src/sord_config.h   | 14 +++++------
 src/sord_validate.c | 61 +++++++++++++++++++++++++++------------------
 4 files changed, 49 insertions(+), 35 deletions(-)

diff --git a/meson.build b/meson.build
index 669e180..2bfd814 100644
--- a/meson.build
+++ b/meson.build
@@ -117,14 +117,14 @@ if not get_option('tools').disabled()
 
   meson.override_find_program('sordi', sordi)
 
-  pcre_dep = dependency('libpcre', required: false)
+  pcre2_dep = dependency('libpcre2-8', required: false)
 
-  if pcre_dep.found()
+  if pcre2_dep.found()
     sord_validate = executable('sord_validate',
                                files('src/sord_validate.c'),
                                c_args: c_suppressions,
                                install: true,
-                               dependencies: [sord_dep, pcre_dep])
+                               dependencies: [sord_dep, pcre2_dep])
 
     meson.override_find_program('sord_validate', sord_validate)
   endif
diff --git a/src/sord_config.h b/src/sord_config.h
index 7853b4a..5b7a341 100644
--- a/src/sord_config.h
+++ b/src/sord_config.h
@@ -20,11 +20,11 @@
 
 #if !defined(SORD_NO_DEFAULT_CONFIG)
 
-// The validator uses PCRE for literal pattern matching
-#  ifndef HAVE_PCRE
+// The validator uses PCRE2 for literal pattern matching
+#  ifndef HAVE_PCRE2
 #    ifdef __has_include
-#      if __has_include(<pcre.h>)
-#        define HAVE_PCRE 1
+#      if __has_include(<pcre2.h>)
+#        define HAVE_PCRE2 1
 #      endif
 #    endif
 #  endif
@@ -39,10 +39,10 @@
   if the build system defines them all.
 */
 
-#ifdef HAVE_PCRE
-#  define USE_PCRE 1
+#ifdef HAVE_PCRE2
+#  define USE_PCRE2 1
 #else
-#  define USE_PCRE 0
+#  define USE_PCRE2 0
 #endif
 
 #endif // SORD_CONFIG_H
diff --git a/src/sord_validate.c b/src/sord_validate.c
index d899eb6..6d40138 100644
--- a/src/sord_validate.c
+++ b/src/sord_validate.c
@@ -8,8 +8,9 @@
 #include "sord/sord.h"
 #include "sord_config.h"
 
-#if USE_PCRE
-#  include <pcre.h>
+#if USE_PCRE2
+#  define PCRE2_CODE_UNIT_WIDTH 8
+#  include <pcre2.h>
 #endif
 
 #ifdef _WIN32
@@ -176,31 +177,43 @@ is_descendant_of(SordModel*      model,
 }
 
 static bool
-regexp_match(const uint8_t* pat, const char* str)
+regexp_match(const uint8_t* const pattern, const char* const str)
 {
-#if USE_PCRE
-  // Append a $ to the pattern so we only match if the entire string matches
-  const size_t len  = strlen((const char*)pat);
-  char* const  regx = (char*)malloc(len + 2);
-  memcpy(regx, pat, len);
-  regx[len]     = '$';
-  regx[len + 1] = '\0';
-
-  const char* err;
-  int         erroffset;
-  pcre*       re = pcre_compile(regx, PCRE_ANCHORED, &err, &erroffset, NULL);
-  free(regx);
+#if USE_PCRE2
+  static const uint32_t options = PCRE2_ANCHORED | PCRE2_ENDANCHORED;
+
+  int    err       = 0;
+  size_t erroffset = 0U;
+
+  pcre2_code* const re = pcre2_compile(
+    pattern, PCRE2_ZERO_TERMINATED, options, &err, &erroffset, NULL);
+
   if (!re) {
-    fprintf(
-      stderr, "Error in pattern `%s' at offset %d (%s)\n", pat, erroffset, err);
+    fprintf(stderr,
+            "Error in pattern `%s' at offset %lu (%d)\n",
+            pattern,
+            erroffset,
+            err);
     return false;
   }
 
-  const bool ret =
-    pcre_exec(re, NULL, str, (int)strlen(str), 0, 0, NULL, 0) >= 0;
-  pcre_free(re);
-  return ret;
-#endif // USE_PCRE
+  pcre2_match_data* const match_data =
+    pcre2_match_data_create_from_pattern(re, NULL);
+
+  const int rc = pcre2_match(re,
+                             (const uint8_t*)str,
+                             PCRE2_ZERO_TERMINATED,
+                             0,
+                             options,
+                             match_data,
+                             NULL);
+
+  pcre2_match_data_free(match_data);
+
+  pcre2_code_free(re);
+  return rc > 0;
+#endif // USE_PCRE2
+
   return true;
 }
 
@@ -776,8 +789,8 @@ main(int argc, char** argv)
   URI(xsd, pattern);
   URI(xsd, string);
 
-#if !USE_PCRE
-  fprintf(stderr, "warning: Built without PCRE, datatypes not checked.\n");
+#if !USE_PCRE2
+  fprintf(stderr, "warning: Built without PCRE2, datatypes not checked.\n");
 #endif
 
   const int prop_st = check_properties(model, &uris);
